## Loading required package: xml2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
rawdata <- function(url,node) {
  if(node == '.json') {
    json <- fromJSON(url)
    
    return(json)
  }
  
  webpage <- read_html(url)
  extracted <- html_nodes(webpage,node)
  
  
  return(extracted)
}
# pull deck data from tcgplayer
# =============================

archetype_node = 'td:nth-child(2) a'
date_node = 'td:nth-child(6)'

archetypes <- data.frame( archetype = character(0), date = character(0))

for(i in 1:200) {
  url <- str_c('http://decks.tcgplayer.com/magic/deck/search?page=',i,'&format=standard&startdate=1-1-2012&p1=true&p2=true&p3t4=true&p5t8=true&p9t16=true&p17t32=true&p33t64=true')
  
  archetype <- rawdata(url,archetype_node) %>%
    str_extract( . , '>(.*?)<') %>%
    str_replace( . , '>', '') %>%
    str_replace( . , '<', '')
  
  date <- rawdata(url,date_node) %>%
    str_extract( . ,'\\n(.*?)\\r') %>%
    str_replace( . ,'\\n', '') %>%
    str_replace( . ,'\\r', '') %>%
    str_trim("both")

  staging_archetypes <- data.frame( archetype , date )
  
  archetypes <- rbind(archetypes,staging_archetypes)
}

write.csv(archetypes, file = 'data/archetypes.csv')
# pull set release dates from scryfall
# ====================================

scryfallapi <- 'https://api.scryfall.com/sets'

scryfallsets <- 'https://scryfall.com/sets'
ccountnode <- 'td:nth-child(4) a'
scodenode <- 'small'

mtgsets <- rawdata(scryfallapi,'.json')
setcounts <- data.frame(
  setcd = rawdata(scryfallsets, scodenode) %>%
    str_replace('<small>','') %>%
    str_replace('</small>',''), 
  cardct = rawdata(scryfallsets,ccountnode) %>%
    str_extract('">(.*?)</a>') %>%
    str_replace('">','') %>%
    str_replace('</a>',''), 
  stringsAsFactors = FALSE
)

mtgsets$data$code <- toupper(mtgsets$data$code)
mtgsets <- merge( x = mtgsets$data , y = setcounts , by.x = 'code' , by.y = 'setcd' )

mtgsets$released_at <- mtgsets$released_at %>%
  str_extract('(.*?)T') %>%
  str_replace('T','')

bigsets <- mtgsets %>%
  filter(toupper(block) == toupper(name) & set_type == 'expansion') %>%
  arrange(as.Date(released_at))

smallsets <- mtgsets %>% 
  filter( (toupper(block) != toupper(name) & set_type == 'expansion') | (set_type == 'core') ) %>%
  arrange(as.Date(released_at))

sm <- data.frame( scode = smallsets$code , sdate = smallsets$released_at , ssize = 'sm', stringsAsFactors = FALSE)
lg <- data.frame( scode = bigsets$code , sdate = bigsets$released_at , ssize = 'lg', stringsAsFactors = FALSE)

setoutput <- rbind(sm,lg) %>%
  filter(as.Date(sdate) >= '2011-01-01') %>%
  arrange(as.Date(sdate))

write.csv(setoutput, file = "data/sets.csv", quote = FALSE)
# load scraped data from csv
# ==========================
archcsv <- read.csv('data/archetypes.csv', stringsAsFactors = FALSE) %>%
  mutate( deckdate = as.Date(date, '%m/%d/%Y'))

prune <- archcsv %>%
  group_by(archetype) %>%
  count(archetype) %>%
  filter(n > 75)

graphdata <- merge( x = archcsv , y = prune , by.x = "archetype" , by.y = "archetype") %>%
  select(archetype,deckdate)

bigdates <- read.csv('data/sets.csv', stringsAsFactors = FALSE) %>%
  mutate(sdate = as.numeric(as.Date(sdate))) %>%
  filter(ssize == 'lg') %>%
  select(sdate)

smalldates <- read.csv('data/sets.csv', stringsAsFactors = FALSE) %>%
  mutate(sdate = as.numeric(as.Date(sdate))) %>% 
  filter(ssize == 'sm') %>%
  select(sdate)

p <- ggplot(graphdata, aes( x = deckdate , y = ..count.. , fill = archetype), fig.width=100, fig.height=110) +
  geom_density(position = "fill") +
  theme(legend.position = "none") +
  ylab('Archetype Prevalence') +
  xlab('Date') + 
  geom_vline(xintercept = unlist(bigdates), linetype = "solid", color = "black") + 
  geom_vline(xintercept = unlist(smalldates), linetype = "dotted", color = "gray")

metagraph <- ggplotly(p) %>%
  layout(autosize = F, width = 750, height = 500)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
metagraph